setup

rm(list = ls())
library(dplyr)
library(ggplot2)
library(mgcv)

download data

if (file.exists("openpowerlifting-2020-06-20/openpowerlifting-2020-06-20.csv")){
  data.working <- read.csv("openpowerlifting-2020-06-20/openpowerlifting-2020-06-20.csv")
} else {
  download.file("https://github.com/sstangl/openpowerlifting-static/raw/gh-pages/openpowerlifting-latest.zip",
              "openpowerlifting-latest.zip")
  unzip("openpowerlifting-latest.zip")
  
  data.working <- read.csv("openpowerlifting-2020-06-20/openpowerlifting-2020-06-20.csv")
}

Glimpse & summary data

glimpse(data.working)
Observations: 1,979,433
Variables: 40
$ Name             <fct> Tucker Sanders, Xander Womack, Michael Womack III, Logan McCurley, Emily Jondron, Harley Sanders, Chris Sanders, Larry Cox,...
$ Sex              <fct> M, M, M, M, F, M, M, M, M, M, M, M, M, M, M, M, M, M, M, M, M, M, M, M, M, M, F, F, F, F, F, F, F, F, F, F, M, F, F, F, F, ...
$ Event            <fct> B, B, B, B, B, B, B, B, B, B, SBD, SBD, SBD, SBD, SBD, SBD, SBD, SBD, SBD, SBD, SBD, SBD, SBD, SBD, SBD, SBD, SBD, SBD, SBD...
$ Equipment        <fct> Single-ply, Single-ply, Single-ply, Single-ply, Single-ply, Multi-ply, Multi-ply, Multi-ply, Multi-ply, Multi-ply, Wraps, W...
$ Age              <dbl> 15.0, 14.0, 16.0, 16.0, 21.0, 16.0, 42.0, 42.0, 44.0, 26.0, NA, 27.5, 32.5, NA, NA, 14.0, NA, 24.0, NA, 38.5, NA, NA, NA, N...
$ AgeClass         <fct> 13-15, 13-15, 16-17, 16-17, 20-23, 16-17, 40-44, 40-44, 40-44, 24-34, , 24-34, 24-34, , , 13-15, , 24-34, , 35-39, , , , , ...
$ BirthYearClass   <fct> 14-18, 14-18, 14-18, 14-18, 19-23, 14-18, 40-49, 40-49, 40-49, 24-39, , 24-39, 24-39, , , 14-18, , 24-39, , , , , , , , 24-...
$ Division         <fct> Teen 14-15, Teen 14-15, Teen 16-17, Teen 16-17, Juniors 20-25, Open, Masters 40-46, Masters 40-46, Masters 40-46, Open, Ope...
$ BodyweightKg     <dbl> 52.0, 100.0, 67.5, 75.0, 90.0, 75.0, 82.5, 90.0, 90.0, NA, 67.5, 82.5, 82.5, 90.0, 90.0, 90.0, 100.0, 110.0, 110.0, 110.0, ...
$ WeightClassKg    <fct> 52, 100, 67.5, 75, 90, 75, 82.5, 90, 90, 140+, 67.5, 82.5, 82.5, 90, 90, 90, 100, 110, 110, 110, 110, 110, 125, 140, 140, 1...
$ Squat1Kg         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ Squat2Kg         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ Squat3Kg         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ Squat4Kg         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ Best3SquatKg     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 136.08, 195.04, 183.70, 188.24, 165.56, 179.17, 260.82, 288.03, 249.48, 226.80, 179...
$ Bench1Kg         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ Bench2Kg         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ Bench3Kg         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ Bench4Kg         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ Best3BenchKg     <dbl> 85.00, 162.50, 110.00, 117.50, 160.00, 250.00, 165.00, 305.00, 350.00, 272.50, 72.57, 138.35, 111.13, 115.67, 92.99, NA, NA...
$ Deadlift1Kg      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ Deadlift2Kg      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ Deadlift3Kg      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ Deadlift4Kg      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ Best3DeadliftKg  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 183.70, 249.48, 215.46, 208.65, 174.63, 210.92, 306.17, 317.51, 265.35, 226.80, 238...
$ TotalKg          <dbl> 85.00, 162.50, 110.00, 117.50, 160.00, 250.00, 165.00, 305.00, 350.00, 272.50, 392.36, 582.87, 510.29, 512.56, 433.18, NA, ...
$ Place            <fct> 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 2, DQ, DQ, 1, 2, 3, 4, 5, 1, 1, 2, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2...
$ Dots             <dbl> 81.41, 100.02, 84.78, 84.30, 142.64, 179.36, 111.77, 197.21, 226.31, NA, 302.41, 394.83, 345.67, 331.42, 280.09, NA, NA, 45...
$ Wilks            <dbl> 83.41, 98.90, 84.81, 83.73, 138.25, 178.14, 110.53, 194.71, 223.44, NA, 302.52, 390.47, 341.85, 327.22, 276.54, NA, NA, 452...
$ Glossbrenner     <dbl> 82.14, 94.46, 82.33, 80.90, 120.17, 172.13, 106.36, 186.61, 214.14, NA, 293.65, 375.71, 328.92, 313.60, 265.04, NA, NA, 432...
$ Goodlift         <dbl> 49.90, 51.67, 46.65, 45.24, 81.45, 96.25, 58.98, 102.89, 118.07, NA, 60.44, 80.93, 70.85, 68.14, 57.59, NA, NA, 92.91, 81.4...
$ Tested           <fct> , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , 
$ Country          <fct> USA, USA, USA, USA, USA, USA, USA, USA, USA, USA, USA, , , , USA, , , USA, , USA, , , USA, , , USA, , , , , USA, USA, , USA...
$ Federation       <fct> USABPA, USABPA, USABPA, USABPA, USABPA, USABPA, USABPA, USABPA, USABPA, USABPA, USSports, USSports, USSports, USSports, USS...
$ ParentFederation <fct> , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , WPC, WPC, WPC, WPC, WPC, WPC, WPC, WPC, WPC, WPC,...
$ Date             <fct> 2019-04-06, 2019-04-06, 2019-04-06, 2019-04-06, 2019-04-06, 2019-04-06, 2019-04-06, 2019-04-06, 2019-04-06, 2019-04-06, 201...
$ MeetCountry      <fct> USA, USA, USA, USA, USA, USA, USA, USA, USA, USA, USA, USA, USA, USA, USA, USA, USA, USA, USA, USA, USA, USA, USA, USA, USA...
$ MeetState        <fct> TX, TX, TX, TX, TX, TX, TX, TX, TX, TX, KS, KS, KS, KS, KS, KS, KS, KS, KS, KS, KS, KS, KS, KS, KS, KS, KS, KS, KS, KS, KS,...
$ MeetTown         <fct> Ft. Worth, Ft. Worth, Ft. Worth, Ft. Worth, Ft. Worth, Ft. Worth, Ft. Worth, Ft. Worth, Ft. Worth, Ft. Worth, Olathe, Olath...
$ MeetName         <fct> Ronnie Coleman Classic, Ronnie Coleman Classic, Ronnie Coleman Classic, Ronnie Coleman Classic, Ronnie Coleman Classic, Ron...
summary(data.working)
                    Name         Sex          Event              Equipment            Age            AgeClass      BirthYearClass  
 Magomedamin Israpilov:    280   F : 489558   B  : 396980   Multi-ply : 100775   Min.   : 0.0            :665835          :810965  
 Alan Aerts           :    245   M :1489859   BD :  37648   Raw       : 701877   1st Qu.:21.0     24-34  :368171   24-39  :459291  
 Erik Rasmussen       :    198   Mx:     16   D  :  99502   Single-ply:1042117   Median :28.0     20-23  :203487   19-23  :226558  
 Sverre Paulsen       :    197                S  :   8001   Straps    :     40   Mean   :31.4     18-19  :173540   40-49  :177118  
 Bonnie Aerts         :    196                SB :   2642   Wraps     : 134624   3rd Qu.:39.5     16-17  :124271   14-18  :159371  
 Gordon Santee        :    168                SBD:1433116                        Max.   :98.0     35-39  :101251   50-59  : 90857  
 (Other)              :1978149                SD :   1544                        NA's   :865345   (Other):342878   (Other): 55273  
         Division       BodyweightKg    WeightClassKg        Squat1Kg          Squat2Kg          Squat3Kg          Squat4Kg        Best3SquatKg   
 Open        :556409   Min.   : 15.10   90     : 154501   Min.   :-555.0    Min.   :-580.0    Min.   :-600.5    Min.   :-550.0    Min.   :-477.5  
 Boys        :310096   1st Qu.: 67.00   75     : 144951   1st Qu.:  87.5    1st Qu.:  70.0    1st Qu.:-162.5    1st Qu.:-110.0    1st Qu.: 122.5  
 Girls       :141942   Median : 81.92   100    : 142517   Median : 145.0    Median : 145.0    Median : 110.0    Median : 130.0    Median : 170.0  
 Juniors     : 66814   Mean   : 84.11   82.5   : 135372   Mean   : 113.0    Mean   :  92.4    Mean   :  31.7    Mean   :  68.9    Mean   : 175.1  
 MR-O        : 39502   3rd Qu.: 99.00   110    : 103187   3rd Qu.: 200.0    3rd Qu.: 205.0    3rd Qu.: 192.5    3rd Qu.: 201.0    3rd Qu.: 220.0  
 Amateur Open: 37595   Max.   :260.20   67.5   : 101103   Max.   : 555.0    Max.   : 577.5    Max.   : 560.0    Max.   : 505.5    Max.   : 580.0  
 (Other)     :827075   NA's   :27067    (Other):1197802   NA's   :1498188   NA's   :1503907   NA's   :1517871   NA's   :1974491   NA's   :605421  
    Bench1Kg          Bench2Kg          Bench3Kg          Bench4Kg        Best3BenchKg     Deadlift1Kg       Deadlift2Kg       Deadlift3Kg     
 Min.   :-502.5    Min.   :-575.0    Min.   :-575.0    Min.   :-500.0    Min.   :-522.5   Min.   :-461.0    Min.   :-470.0    Min.   :-587.5   
 1st Qu.:  56.7    1st Qu.: -50.0    1st Qu.:-137.5    1st Qu.:-128.0    1st Qu.:  75.0   1st Qu.: 125.0    1st Qu.: 115.0    1st Qu.:-207.5   
 Median : 105.0    Median :  95.0    Median : -60.0    Median :  75.0    Median : 115.0   Median : 180.0    Median : 177.5    Median : 117.5   
 Mean   :  83.7    Mean   :  55.2    Mean   : -18.2    Mean   :  22.2    Mean   : 118.2   Mean   : 160.9    Mean   : 130.1    Mean   :  15.2   
 3rd Qu.: 145.0    3rd Qu.: 145.0    3rd Qu.: 117.5    3rd Qu.: 156.2    3rd Qu.: 152.5   3rd Qu.: 225.0    3rd Qu.: 230.0    3rd Qu.: 205.0   
 Max.   : 467.5    Max.   : 487.5    Max.   : 478.5    Max.   : 487.6    Max.   : 488.5   Max.   : 450.0    Max.   : 460.4    Max.   : 457.5   
 NA's   :1241513   NA's   :1250750   NA's   :1273724   NA's   :1966266   NA's   :227920   NA's   :1444994   NA's   :1455794   NA's   :1479964  
  Deadlift4Kg      Best3DeadliftKg     TotalKg           Place             Dots            Wilks         Glossbrenner       Goodlift      Tested       
 Min.   :-461.0    Min.   :-410.0   Min.   :   1.0   1      :768392   Min.   :  0.68   Min.   :  0.67   Min.   :  0.64   Min.   :  0.50      : 526427  
 1st Qu.:-117.5    1st Qu.: 140.0   1st Qu.: 217.7   2      :303634   1st Qu.:167.30   1st Qu.:166.56   1st Qu.:156.78   1st Qu.: 51.98   Yes:1453006  
 Median : 143.0    Median : 188.2   Median : 369.7   3      :189861   Median :303.25   Median :302.34   Median :282.75   Median : 63.31                
 Mean   :  75.4    Mean   : 189.1   Mean   : 388.8   DQ     :134073   Mean   :283.48   Mean   :282.47   Mean   :266.23   Mean   : 63.77                
 3rd Qu.: 208.5    3rd Qu.: 235.0   3rd Qu.: 540.0   4      :131489   3rd Qu.:375.80   3rd Qu.:374.24   3rd Qu.:354.59   3rd Qu.: 75.01                
 Max.   : 440.5    Max.   : 460.4   Max.   :1407.5   5      : 96919   Max.   :795.22   Max.   :793.33   Max.   :756.90   Max.   :146.49                
 NA's   :1966724   NA's   :515012   NA's   :146611   (Other):355065   NA's   :163883   NA's   :163883   NA's   :163883   NA's   :305726                
    Country          Federation      ParentFederation         Date            MeetCountry        MeetState                MeetTown      
        :1028206   THSPA  : 321715   IPF    :813960   2018-02-17:   7109   USA      :1138766          :572622                 : 257810  
 USA    : 478414   USAPL  : 156312          :770470   2017-02-18:   7080   Russia   : 196552   TX     :519344   Moscow        :  35323  
 Russia :  90091   THSWPA : 143196   WPC    :120048   2017-01-21:   6567   Canada   :  56975   CA     : 68596   Las Vegas     :  25234  
 Canada :  50964   USPF   : 116369   IPL    :120046   2019-01-19:   6566   Ukraine  :  53622   FL     : 41238   St. Petersburg:  14927  
 Ukraine:  31010   USPA   : 104557   GPC    : 35876   2019-02-16:   6479   Australia:  48032   PA     : 39846   Yekaterinburg :  10952  
 Germany:  27235   FPR    :  75021   WRPF   : 30766   2019-02-09:   6392   Germany  :  39643   MOW    : 38001   Columbus      :   9524  
 (Other): 273513   (Other):1062263   (Other): 88267   (Other)   :1939240   (Other)  : 445843   (Other):699786   (Other)       :1625663  
                   MeetName      
 World Championships   :  52564  
 European Championships:  20634  
 World Cup             :  12953  
 Nationals             :   9913  
 Raw Nationals         :   8177  
 High School Nationals :   7918  
 (Other)               :1867274  

filter data

creating features

data.working <- data.working %>%
  mutate(AgeBucket = as.factor(case_when(
         Age <= 19 ~ "Younger",
         Age >= 20 & Age <= 23 ~ "Junior",
         Age >= 24 & Age <= 35 ~ "Open",
         Age >= 36 ~ "Master",
         TRUE ~ "ERROR"))) %>%
  mutate(Federation = as.factor(Federation))

scatter plots by gender

GAM #1

gam1 <- gam(TotalKg ~ te(TotalKg, BodyweightKg) + s(AgeBucket, bs = "re") + s(Federation, bs = "re"),
            data = data.working[data.working$Sex == "M",],
            method = "REML")
LS0tCnRpdGxlOiAib3Blbi1wb3dlcmxpZnRpbmctR0FNIgphdXRob3I6ICJKb2huIE15c2xpbnNraSIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQojIHNldHVwCmBgYHtyIHNldHVwLCByZXN1bHRzID0gJ2hpZGUnLCB3YXJuaW5nPUZBTFNFfQpybShsaXN0ID0gbHMoKSkKCmxpYnJhcnkoZHBseXIpCmxpYnJhcnkoZ2dwbG90MikKbGlicmFyeShtZ2N2KQpgYGAKCiMgZG93bmxvYWQgZGF0YQpgYGB7ciBkb3dubG9hZCwgcmVzdWx0cyA9ICdoaWRlJywgd2FybmluZz1GQUxTRX0KaWYgKGZpbGUuZXhpc3RzKCJvcGVucG93ZXJsaWZ0aW5nLTIwMjAtMDYtMjAvb3BlbnBvd2VybGlmdGluZy0yMDIwLTA2LTIwLmNzdiIpKXsKICBkYXRhLndvcmtpbmcgPC0gcmVhZC5jc3YoIm9wZW5wb3dlcmxpZnRpbmctMjAyMC0wNi0yMC9vcGVucG93ZXJsaWZ0aW5nLTIwMjAtMDYtMjAuY3N2IikKfSBlbHNlIHsKICBkb3dubG9hZC5maWxlKCJodHRwczovL2dpdGh1Yi5jb20vc3N0YW5nbC9vcGVucG93ZXJsaWZ0aW5nLXN0YXRpYy9yYXcvZ2gtcGFnZXMvb3BlbnBvd2VybGlmdGluZy1sYXRlc3QuemlwIiwKICAgICAgICAgICAgICAib3BlbnBvd2VybGlmdGluZy1sYXRlc3QuemlwIikKICB1bnppcCgib3BlbnBvd2VybGlmdGluZy1sYXRlc3QuemlwIikKICAKICBkYXRhLndvcmtpbmcgPC0gcmVhZC5jc3YoIm9wZW5wb3dlcmxpZnRpbmctMjAyMC0wNi0yMC9vcGVucG93ZXJsaWZ0aW5nLTIwMjAtMDYtMjAuY3N2IikKfQoKCgoKYGBgCgojIEdsaW1wc2UgJiBzdW1tYXJ5IGRhdGEKYGBge3IgZ2xpbXBzZX0KZ2xpbXBzZShkYXRhLndvcmtpbmcpCnN1bW1hcnkoZGF0YS53b3JraW5nKQpgYGAKCiMgZmlsdGVyIGRhdGEKYGBge3J9CmRhdGEud29ya2luZyA8LSBkYXRhLndvcmtpbmcgJT4lCiAgZmlsdGVyKEV2ZW50ID09ICJTQkQiLAogICAgICAgICBFcXVpcG1lbnQgPT0gIlJhdyIsCiAgICAgICAgICFpcy5uYShBZ2UpLAogICAgICAgICAhaXMubmEoQm9keXdlaWdodEtnKSwKICAgICAgICAgIWlzLm5hKFRvdGFsS2cpLAogICAgICAgICBQYXJlbnRGZWRlcmF0aW9uID09ICJJUEYiKQpgYGAKCiMgY3JlYXRpbmcgZmVhdHVyZXMKYGBge3J9CmRhdGEud29ya2luZyA8LSBkYXRhLndvcmtpbmcgJT4lCiAgbXV0YXRlKEFnZUJ1Y2tldCA9IGFzLmZhY3RvcihjYXNlX3doZW4oCiAgICAgICAgIEFnZSA8PSAxOSB+ICJZb3VuZ2VyIiwKICAgICAgICAgQWdlID49IDIwICYgQWdlIDw9IDIzIH4gIkp1bmlvciIsCiAgICAgICAgIEFnZSA+PSAyNCAmIEFnZSA8PSAzNSB+ICJPcGVuIiwKICAgICAgICAgQWdlID49IDM2IH4gIk1hc3RlciIsCiAgICAgICAgIFRSVUUgfiAiRVJST1IiKSkpICU+JQogIG11dGF0ZShGZWRlcmF0aW9uID0gYXMuZmFjdG9yKEZlZGVyYXRpb24pKQpgYGAKCgojIHNjYXR0ZXIgcGxvdHMgYnkgZ2VuZGVyCmBgYHtyfQpsb29wX3ZhcnMgPC0gYygnU2V4JywgJ0FnZUJ1Y2tldCcsICdGZWRlcmF0aW9uJykKZm9yIChlYWNoIGluIGxvb3BfdmFycyl7CiAgZ2cgPC0gZ2dwbG90KGRhdGEud29ya2luZykgKwogICAgZ2VvbV9wb2ludChhZXNfc3RyaW5nKHggPSAiQm9keXdlaWdodEtnIiwgeSA9ICJUb3RhbEtnIiwgY29sb3IgPSBlYWNoLCBncm91cCA9IGVhY2gpLCBhbHBoYSA9IC41KSsKICAgIGZhY2V0X3dyYXAocGFzdGUoIn4iLGVhY2gpKQogIAogIHByaW50KGdnKQp9CmBgYAoKIyBHQU0gIzEKYGBge3J9CmdhbTEgPC0gZ2FtKFRvdGFsS2cgfiB0ZShUb3RhbEtnLCBCb2R5d2VpZ2h0S2cpICsgcyhBZ2VCdWNrZXQsIGJzID0gInJlIikgKyBzKEZlZGVyYXRpb24sIGJzID0gInJlIiksCiAgICAgICAgICAgIGRhdGEgPSBkYXRhLndvcmtpbmdbZGF0YS53b3JraW5nJFNleCA9PSAiTSIsXSwKICAgICAgICAgICAgbWV0aG9kID0gIlJFTUwiKQoKZGF0YS53b3JraW5nW2RhdGEud29ya2luZyRTZXggPT0gIk0iLCJnYW0xIl0gPC0gcHJlZGljdChnYW0xLCB0eXBlID0gInJlc3BvbnNlIikKYGBgCgoK